#This script calculates cosine similarities with the opposition minus support vector
#It does this for each country and each version (sample size)
#It does not use normalization as in the main analyses
library(quanteda)
library(conText)
library(dplyr)
library(text2vec)
source("utils.R")

set.seed(123L)

# Define the list of countries
countries <-
  c("djazairess", "maghress", "masress", "sauress", "turess")

# using opposition minus support vector with single words
first_ar = "المعارضة"
second_ar = "الدعم"

# Get cosine similarities
process_cos_sim <-
  function(country_name,
           local_glove,
           local_transform) {
    # Read in and compute from target toks
    cos_simsdf_all <- get_similarity_scores2(target_toks_file = paste0("data/analysis_toks/", country_name, "_target_toks_leader.rds"), 
                                             first_vec = first_ar, 
                                             second_vec = second_ar, 
                                             pre_trained = local_glove,
                                             transform_matrix = local_transform,
                                             group_var = "yearwk",
                                             window = 12L,
                                             norm = "none")
    cat("Getting cos_sims for country ", country_name, "\n")
    
    return(cos_simsdf_all)
  }

# Versions to process
sample_sizes <- c(1e4, 5e4, 1e5, 5e5, 1e6, 1.5e6)
sample_sizes <- sapply(sample_sizes, function(x) format(x, scientific = FALSE))
versions <- paste0(sample_sizes, "30k")

# Function to process each version and country
process_version <- function(version) {
  # Read the combined embedding for the specific version
  local_transform <-
    readRDS(paste0(
      "data/embedding_combined/combined_local_transform",
      version,
      ".rds"
    ))
  local_glove <-
    readRDS(paste0(
      "data/embedding_combined/combined_local_glove",
      version,
      ".rds"
    ))
  
  # Loop through countries and apply the process_cos_sim function, saving results
  results <-
    lapply(countries, process_cos_sim, local_glove, local_transform)
  
  # If you need to save the results for each country
  for (i in seq_along(countries)) {
    # Create a subdirectory for the country if it doesn't exist
    dir_name <- paste0("data/output/cos_sims/", countries[i])
    if (!dir.exists(dir_name)) {
      dir.create(dir_name)
    }
    
    # Save the result in the country's subdirectory
    saveRDS(results[[i]],
            paste0(dir_name, "/", "cos_simsdf_all_ip", version, ".rds"))
  }
  
}

# Loop through versions and apply the process_version function
lapply(versions, process_version)
